{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Model Selection & HyperParameter Tuning.ipynb",
      "provenance": [],
      "authorship_tag": "ABX9TyP+aA3vsymW+gd2FUlItLAW",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/Divyanshu-ISM/Machine-Learning-Deep-Learning/blob/main/Model_Selection_%26_HyperParameter_Tuning.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "3zenZjkUW8OK"
      },
      "source": [
        "# Model Selection & HyperParameter Tuning\r\n",
        "\r\n",
        "> ### 1. GridSearchCV\r\n",
        "> ### 2. Randomized Search CV"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "pUvSMONEWytY"
      },
      "source": [
        "import numpy as np\r\n",
        "import pandas as pd\r\n",
        "import seaborn as sns\r\n",
        "import matplotlib.pyplot as plt"
      ],
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "gltJUK1TXP-p"
      },
      "source": [
        "from sklearn.datasets import load_iris\r\n",
        "\r\n",
        "iris = load_iris()\r\n",
        "X = iris.data\r\n",
        "y = iris.target"
      ],
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "4fAZUeKjXpQr"
      },
      "source": [
        "#Optional\r\n",
        "df = pd.DataFrame(data=X,columns=iris.feature_names)\r\n",
        "df['Species'] = y\r\n",
        "# df.head()\r\n",
        "namer = lambda x : iris.target_names[x]\r\n",
        "df['Species'] = df['Species'].apply(namer)"
      ],
      "execution_count": 7,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 195
        },
        "id": "5CVpcGvSXqIt",
        "outputId": "1c368add-ec6f-4818-c67c-65dc48863d81"
      },
      "source": [
        "df.head()"
      ],
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>sepal length (cm)</th>\n",
              "      <th>sepal width (cm)</th>\n",
              "      <th>petal length (cm)</th>\n",
              "      <th>petal width (cm)</th>\n",
              "      <th>Species</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>5.1</td>\n",
              "      <td>3.5</td>\n",
              "      <td>1.4</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>4.9</td>\n",
              "      <td>3.0</td>\n",
              "      <td>1.4</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>4.7</td>\n",
              "      <td>3.2</td>\n",
              "      <td>1.3</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>4.6</td>\n",
              "      <td>3.1</td>\n",
              "      <td>1.5</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>5.0</td>\n",
              "      <td>3.6</td>\n",
              "      <td>1.4</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   sepal length (cm)  sepal width (cm)  ...  petal width (cm)  Species\n",
              "0                5.1               3.5  ...               0.2   setosa\n",
              "1                4.9               3.0  ...               0.2   setosa\n",
              "2                4.7               3.2  ...               0.2   setosa\n",
              "3                4.6               3.1  ...               0.2   setosa\n",
              "4                5.0               3.6  ...               0.2   setosa\n",
              "\n",
              "[5 rows x 5 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 8
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "7Quwowz-YFgW"
      },
      "source": [
        "from sklearn.model_selection import GridSearchCV"
      ],
      "execution_count": 9,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "3zKLYYuwYZul"
      },
      "source": [
        "from sklearn.svm import SVC\r\n",
        "from sklearn.ensemble import RandomForestClassifier\r\n",
        "from sklearn.naive_bayes import GaussianNB"
      ],
      "execution_count": 10,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "-GRS35ufYVf1",
        "outputId": "af5e8e8a-e737-4312-c880-4719b243396b"
      },
      "source": [
        "#Instantiate a Grid-Search Object\r\n",
        "grid = GridSearchCV(SVC(gamma='auto'),\r\n",
        "                    {'C':np.linspace(1,30),\r\n",
        "                     'kernel':['rbf','linear']},\r\n",
        "                    cv=5,return_train_score=False)\r\n",
        "\r\n",
        "#Fit the Grid-Search object to data. \r\n",
        "grid.fit(X,y)\r\n",
        "grid.best_params_"
      ],
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{'C': 1.5918367346938775, 'kernel': 'rbf'}"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 17
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 195
        },
        "id": "i5DSvYTvZqvt",
        "outputId": "4db5a871-776b-4147-f727-e856c0ea602f"
      },
      "source": [
        "grid_results = pd.DataFrame(grid.cv_results_)\r\n",
        "grid_results = grid_results[['param_C','param_kernel','mean_test_score']]\r\n",
        "grid_results.head()"
      ],
      "execution_count": 21,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>param_C</th>\n",
              "      <th>param_kernel</th>\n",
              "      <th>mean_test_score</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>1</td>\n",
              "      <td>rbf</td>\n",
              "      <td>0.980000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1</td>\n",
              "      <td>linear</td>\n",
              "      <td>0.980000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>1.59184</td>\n",
              "      <td>rbf</td>\n",
              "      <td>0.986667</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>1.59184</td>\n",
              "      <td>linear</td>\n",
              "      <td>0.986667</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>2.18367</td>\n",
              "      <td>rbf</td>\n",
              "      <td>0.980000</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   param_C param_kernel  mean_test_score\n",
              "0        1          rbf         0.980000\n",
              "1        1       linear         0.980000\n",
              "2  1.59184          rbf         0.986667\n",
              "3  1.59184       linear         0.986667\n",
              "4  2.18367          rbf         0.980000"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 21
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "3ozp-cKrZ3Zb",
        "outputId": "8008ef54-c3c6-4547-c128-6866d417c4f7"
      },
      "source": [
        "max(grid_results['mean_test_score'])"
      ],
      "execution_count": 22,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0.9866666666666667"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 22
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "5rCjtjRkbX49"
      },
      "source": [
        "# Selecting the best Model out of all the prospects "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "-cnz_lq9bCje"
      },
      "source": [
        "from sklearn import svm\r\n",
        "from sklearn.ensemble import RandomForestClassifier\r\n",
        "from sklearn.linear_model import LogisticRegression\r\n",
        "\r\n",
        "model_params = {\r\n",
        "    'svm': {\r\n",
        "        'model': svm.SVC(gamma='auto'),\r\n",
        "        'params' : {\r\n",
        "            'C': [1,10,20],\r\n",
        "            'kernel': ['rbf','linear']\r\n",
        "        }  \r\n",
        "    },\r\n",
        "    'random_forest': {\r\n",
        "        'model': RandomForestClassifier(),\r\n",
        "        'params' : {\r\n",
        "            'n_estimators': [1,5,10]\r\n",
        "        }\r\n",
        "    },\r\n",
        "    'logistic_regression' : {\r\n",
        "        'model': LogisticRegression(solver='liblinear',multi_class='auto'),\r\n",
        "        'params': {\r\n",
        "            'C': [1,5,10]\r\n",
        "        }\r\n",
        "    }\r\n",
        "}"
      ],
      "execution_count": 23,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 136
        },
        "id": "yHHYysdjbzoZ",
        "outputId": "730751a6-0c8d-4546-b7f6-9605881793c2"
      },
      "source": [
        "scores = []\r\n",
        "\r\n",
        "for model_name, mp in model_params.items():\r\n",
        "    clf =  GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)\r\n",
        "    clf.fit(iris.data, iris.target)\r\n",
        "    scores.append({\r\n",
        "        'model': model_name,\r\n",
        "        'best_score': clf.best_score_,\r\n",
        "        'best_params': clf.best_params_\r\n",
        "    })\r\n",
        "    \r\n",
        "modelDF = pd.DataFrame(scores,columns=['model','best_score','best_params'])\r\n",
        "modelDF"
      ],
      "execution_count": 24,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>model</th>\n",
              "      <th>best_score</th>\n",
              "      <th>best_params</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>svm</td>\n",
              "      <td>0.980000</td>\n",
              "      <td>{'C': 1, 'kernel': 'rbf'}</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>random_forest</td>\n",
              "      <td>0.966667</td>\n",
              "      <td>{'n_estimators': 1}</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>logistic_regression</td>\n",
              "      <td>0.966667</td>\n",
              "      <td>{'C': 5}</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "                 model  best_score                best_params\n",
              "0                  svm    0.980000  {'C': 1, 'kernel': 'rbf'}\n",
              "1        random_forest    0.966667        {'n_estimators': 1}\n",
              "2  logistic_regression    0.966667                   {'C': 5}"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 24
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "HX8MPyY-cjB9"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}